Single-cell RNA-seq analysis of KC and TNL Pique-Regi placental villous tissue samples

Important References

Minimal QC recommended: Current best practices in single‐cell RNA‐seq analysis: a tutorial Orchestrating Single-cell Analysis ### Compiling Issues This document had trouble compiling via knitr and pandoc. The following thread fixed the issue (https://github.com/rstudio/rstudio/issues/3661).

Load Data

seu <- readRDS(paste0(data_dir, "fetal_assigned_res.0.3_2020-12-16.rda"))
DimPlot(seu, group.by = 'seurat_clusters', label = T, pt.size = .25)

Split into maternal versus fetal subsets.

split <- SplitObject(seu, split.by = 'fetal')
rm(seu) # Clean memory
fetal <- split$Fetal
maternal <- split$Maternal
rm(split) # Clean memory
DimPlot(fetal) + ggtitle("Fetal")

DimPlot(maternal) + ggtitle("Maternal")

Custom function to run fastMNN pipe with

pipe.fast.mnn <- function(seu, batch) {
  seu <- RunFastMNN(object.list = SplitObject(seu, split.by = batch), verbose = T)
  #print(seu@tools$RunFastMNN@metadata$merge.info$lost.var)
  seu <- RunUMAP(seu, reduction = "mnn", dims = 1:30)
  seu <- FindNeighbors(seu, reduction = "mnn", dims = 1:30)
  seu <- FindClusters(seu, res = 0.2)
  return(seu)
}
maternal <- pipe.fast.mnn(maternal, "biorep")
## Computing 2000 integration features
## No variable features found for object1 in the object.list. Running FindVariableFeatures ...
## No variable features found for object2 in the object.list. Running FindVariableFeatures ...
## No variable features found for object3 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.1438
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.5004
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.5712e-014
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## No variable features found for object4 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.2412
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3221
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0391e-029
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## No variable features found for object5 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.0841
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.5005
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.9079e-014
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 23:06:23 UMAP embedding parameters a = 0.9922 b = 1.112
## 23:06:23 Read 5568 rows and found 30 numeric columns
## 23:06:23 Using Annoy for neighbor search, n_neighbors = 30
## 23:06:23 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 23:06:24 Writing NN index file to temp file C:\Users\Kyle\AppData\Local\Temp\Rtmpms6QbV\filed9307a2e439b
## 23:06:24 Searching Annoy index using 1 thread, search_k = 3000
## 23:06:25 Annoy recall = 100%
## 23:06:25 Commencing smooth kNN distance calibration using 1 thread
## 23:06:26 Initializing from normalized Laplacian + noise
## 23:06:26 Commencing optimization for 500 epochs, with 252296 positive edges
## 23:06:40 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 253293
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9432
## Number of communities: 7
## Elapsed time: 0 seconds
DimPlot(maternal, group.by = "seurat_clusters", label = T, pt.size =.25)

maternal.batch <- pipe.fast.mnn(maternal, "batch")
## Computing 2000 integration features
## No variable features found for object1 in the object.list. Running FindVariableFeatures ...
## No variable features found for object2 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.3149
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.30103
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.3904e-014
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
## 23:06:53 UMAP embedding parameters a = 0.9922 b = 1.112
## 23:06:53 Read 5568 rows and found 30 numeric columns
## 23:06:53 Using Annoy for neighbor search, n_neighbors = 30
## 23:06:53 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 23:06:54 Writing NN index file to temp file C:\Users\Kyle\AppData\Local\Temp\Rtmpms6QbV\filed9305c5062f2
## 23:06:54 Searching Annoy index using 1 thread, search_k = 3000
## 23:06:55 Annoy recall = 100%
## 23:06:55 Commencing smooth kNN distance calibration using 1 thread
## 23:06:56 Initializing from normalized Laplacian + noise
## 23:06:56 Commencing optimization for 500 epochs, with 244916 positive edges
## 23:07:10 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 230126
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9407
## Number of communities: 7
## Elapsed time: 0 seconds
DimPlot(maternal.batch, group.by = "seurat_clusters", label = T, pt.size =.25)

# Function that accepts Seurat  that has been processed up to the clustering step, clusters at desired resolutions (vector), adds cluster identities at different resolutions, and returns Seurat object with resolution cluster identities
Seurat_clustree <- function (seuratObject, resolutions) {
  
  for(hyperparameter in resolutions) {
    print(hyperparameter)
    prefix <- paste0("res.", hyperparameter)
    print(prefix)
    seuratObject <- FindClusters(object = seuratObject, resolution = hyperparameter)
    seuratObject <- AddMetaData(object = seuratObject, metadata = seuratObject$seurat_clusters, col.name = prefix)
  }
  return(seuratObject)
}

resolutions <- seq(from = 0.1, to = 0.8, by = .1)
maternal <- Seurat_clustree(maternal, resolutions)
## [1] 0.1
## [1] "res.0.1"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 253293
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9636
## Number of communities: 4
## Elapsed time: 0 seconds
## [1] 0.2
## [1] "res.0.2"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 253293
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9432
## Number of communities: 7
## Elapsed time: 0 seconds
## [1] 0.3
## [1] "res.0.3"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 253293
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9247
## Number of communities: 8
## Elapsed time: 0 seconds
## [1] 0.4
## [1] "res.0.4"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 253293
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9057
## Number of communities: 8
## Elapsed time: 0 seconds
## [1] 0.5
## [1] "res.0.5"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 253293
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8918
## Number of communities: 10
## Elapsed time: 0 seconds
## [1] 0.6
## [1] "res.0.6"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 253293
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8784
## Number of communities: 10
## Elapsed time: 0 seconds
## [1] 0.7
## [1] "res.0.7"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 253293
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8657
## Number of communities: 11
## Elapsed time: 0 seconds
## [1] 0.8
## [1] "res.0.8"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 253293
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8550
## Number of communities: 14
## Elapsed time: 0 seconds

Iterating over 0.2 to 0.7 clustering resolution, .3 looks stable. Additional divisions at .5

clustree(maternal, prefix = "res.", node_colour = "sc3_stability") + theme(legend.position = "bottom") + guides(edge_alpha = F)
## Warning: The `add` argument of `group_by()` is deprecated as of dplyr 1.0.0.
## Please use the `.add` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

DimPlot(maternal, group.by = 'res.0.1', label = T, pt.size = .25)

DimPlot(maternal, group.by = 'res.0.2', label = T, pt.size = .25)

DimPlot(maternal, group.by = 'res.0.3', label = T, pt.size = .25)

DimPlot(maternal, group.by = 'res.0.4', label = T, pt.size = .25)

DimPlot(maternal, group.by = 'res.0.5', label = T, pt.size = .25)

DimPlot(maternal, group.by = 'res.0.6', label = T, pt.size = .25)

DimPlot(maternal, group.by = 'res.0.7', label = T, pt.size = .25)

DimPlot(maternal, group.by = 'res.0.8', label = T, pt.size = .25)

# Set default to 0.7 for now
maternal <- FindClusters(maternal, res = 0.7)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5568
## Number of edges: 253293
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8657
## Number of communities: 11
## Elapsed time: 0 seconds
DimPlot(maternal, group.by = 'seurat_clusters', label = T, repel = T, pt.size = .25, label.size=  4) + NoLegend()

Cluster 6 is a plasma cell based on differential expression lookup - lots of immunoglobulins overxpressed compared to B cells.

b.cell.markers <- FindMarkers(maternal, ident.1 = "6", ident.2 = "2")
maternal.markers.res.0.4 <- FindAllMarkers(maternal)

Initial findDoubletClusters by cluster doesn’t readily identify any clusters. Cluster 11 is a little low on DE.

maternal.sce <- as.SingleCellExperiment(maternal)
dbl <- findDoubletClusters(maternal.sce, clusters = maternal.sce$seurat_clusters)
dbl.df <- as.data.frame(dbl)
#saveRDS(dbl.df, file = paste0(data_dir, "maternal.res.0.2.subcluster.labelled.initial.doublet.finder", Sys.Date(), ".rda"))

Try simulation approach to finding doublets. Clusters 10 and 11 stand out on VlnPlot.

dbl.dens <- computeDoubletDensity(maternal.sce)
maternal$dbl.dens <- dbl.dens
FeaturePlot(maternal, features = "dbl.dens")

VlnPlot(maternal, features = "dbl.dens")

Key gene expression markers

# Monocyte (and potential CD1C+ Dendritic Cell subset)
FeaturePlot(object = maternal,
            features = c("CD1C", "CD14"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = maternal,
            features = c("CD1C", "CLEC9A"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# FCGR3A+ Monocytes
FeaturePlot(object = maternal,
            features = c("FCGR3A", "MS4A7"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# B Cells
FeaturePlot(object = maternal,
            features = c("CD79A"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# pDC ref: "Single-cell RNA-seq reveals new types of human blood dendritic cells, monocytes and progenitors"
# Looks like pDC are actually off the tail of 
FeaturePlot(object = maternal,
            features = c("HLA-DRA"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = maternal,
            features = c("CLEC4C", "IL3RA"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = maternal,
            features = c("GZMB", "SERPINF1"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = maternal,
            features = c("IL3RA"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# DC 1, bizarrely, no CLEC9A expression, which is supposed to clearly delineate a THBD+ subset, but shows up in the small cluster thought to be pDC
FeaturePlot(object = maternal,
            features = c("THBD", "CLEC9A"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# RBCs
FeaturePlot(object = maternal,
            features = c("HBB", "HBG2"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

t.cells <- subset(maternal, idents = c("2", "3", "4", "6", "7", "9", "10"))
t.cells <- pipe.fast.mnn(t.cells, batch = "biorep")
## Computing 2000 integration features
## No variable features found for object1 in the object.list. Running FindVariableFeatures ...
## No variable features found for object2 in the object.list. Running FindVariableFeatures ...
## No variable features found for object3 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -1.8832
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32088
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.8799e-028
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.031008
## No variable features found for object4 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.0995
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32131
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 4.6566e-028
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.031008
## No variable features found for object5 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at -1.3395
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 0.00029952
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -1.3395
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.017307
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.031008
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
## 23:07:58 UMAP embedding parameters a = 0.9922 b = 1.112
## 23:07:58 Read 2907 rows and found 30 numeric columns
## 23:07:58 Using Annoy for neighbor search, n_neighbors = 30
## 23:07:58 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 23:07:58 Writing NN index file to temp file C:\Users\Kyle\AppData\Local\Temp\Rtmpms6QbV\filed93057f8214a
## 23:07:58 Searching Annoy index using 1 thread, search_k = 3000
## 23:07:58 Annoy recall = 100%
## 23:07:59 Commencing smooth kNN distance calibration using 1 thread
## 23:08:00 Initializing from normalized Laplacian + noise
## 23:08:00 Commencing optimization for 500 epochs, with 127384 positive edges
## 23:08:07 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2907
## Number of edges: 129417
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8871
## Number of communities: 4
## Elapsed time: 0 seconds

0.4 looks stable, another split at 0.7

t.cells <- Seurat_clustree(t.cells, seq(0.1, 0.8, by = 0.1))
## [1] 0.1
## [1] "res.0.1"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2907
## Number of edges: 129417
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9257
## Number of communities: 3
## Elapsed time: 0 seconds
## [1] 0.2
## [1] "res.0.2"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2907
## Number of edges: 129417
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8871
## Number of communities: 4
## Elapsed time: 0 seconds
## [1] 0.3
## [1] "res.0.3"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2907
## Number of edges: 129417
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8585
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.4
## [1] "res.0.4"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2907
## Number of edges: 129417
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8360
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.5
## [1] "res.0.5"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2907
## Number of edges: 129417
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8165
## Number of communities: 8
## Elapsed time: 0 seconds
## [1] 0.6
## [1] "res.0.6"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2907
## Number of edges: 129417
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7988
## Number of communities: 9
## Elapsed time: 0 seconds
## [1] 0.7
## [1] "res.0.7"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2907
## Number of edges: 129417
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7822
## Number of communities: 9
## Elapsed time: 0 seconds
## [1] 0.8
## [1] "res.0.8"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2907
## Number of edges: 129417
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7662
## Number of communities: 9
## Elapsed time: 0 seconds
clustree(t.cells, prefix = "res.", node_colour = "sc3_stability") + theme(legend.position = "bottom") + guides(edge_alpha = F)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

DimPlot(t.cells, group.by = 'res.0.1', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.2', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.3', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.4', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.5', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.6', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.7', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.8', label = T, pt.size = .25)

Clusters 6 and 7, the same clusters from above look like doublet clusters

FeaturePlot(t.cells, features = "dbl.dens", max.cutoff = 7)

VlnPlot(t.cells, features = "dbl.dens")

Cluster 5 is of low quality relative to the rest and DEX overrepresents ribosomal, mitochondrial, and stress response genes. Will drop that along with the doublet clusters 6 and 7.

VlnPlot(t.cells, features = "subsets_Mito_percent")

t.cells <- FindClusters(t.cells, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2907
## Number of edges: 129417
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7662
## Number of communities: 9
## Elapsed time: 0 seconds
doublets.tcells <- WhichCells(t.cells, ident = c("5", "6", "7"))
t.cells <- subset(t.cells, cells = doublets.tcells, invert = T)
DimPlot(t.cells, group.by = 'seurat_clusters', label = T, pt.size = .25)

t.cells <- pipe.fast.mnn(t.cells, "biorep")
## Computing 2000 integration features
## No variable features found for object1 in the object.list. Running FindVariableFeatures ...
## No variable features found for object2 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.7222
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.30103
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 6.2318e-015
## No variable features found for object3 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at -1.7434
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 0.000366
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -1.7434
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.019131
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## No variable features found for object4 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.0243
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32099
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0804e-028
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## No variable features found for object5 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at -1.1628
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 0.00027665
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -1.1628
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.016633
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## Warning in .refine_k(k, precomputed, query = TRUE): 'k' capped at the number of
## observations
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
## 23:08:30 UMAP embedding parameters a = 0.9922 b = 1.112
## 23:08:30 Read 2608 rows and found 30 numeric columns
## 23:08:30 Using Annoy for neighbor search, n_neighbors = 30
## 23:08:30 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 23:08:30 Writing NN index file to temp file C:\Users\Kyle\AppData\Local\Temp\Rtmpms6QbV\filed9306a4242f2
## 23:08:30 Searching Annoy index using 1 thread, search_k = 3000
## 23:08:30 Annoy recall = 100%
## 23:08:31 Commencing smooth kNN distance calibration using 1 thread
## 23:08:32 Initializing from normalized Laplacian + noise
## 23:08:32 Commencing optimization for 500 epochs, with 116186 positive edges
## 23:08:39 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2608
## Number of edges: 130774
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8995
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(t.cells)

Resolution 0.2 avoid the complexity of CD8+ Cytotoxic cells.

t.cells <- Seurat_clustree(t.cells, seq(0.1, 0.8, by = 0.1))
## [1] 0.1
## [1] "res.0.1"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2608
## Number of edges: 130774
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9380
## Number of communities: 4
## Elapsed time: 0 seconds
## [1] 0.2
## [1] "res.0.2"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2608
## Number of edges: 130774
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8995
## Number of communities: 5
## Elapsed time: 0 seconds
## [1] 0.3
## [1] "res.0.3"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2608
## Number of edges: 130774
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8694
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.4
## [1] "res.0.4"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2608
## Number of edges: 130774
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8471
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.5
## [1] "res.0.5"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2608
## Number of edges: 130774
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8256
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.6
## [1] "res.0.6"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2608
## Number of edges: 130774
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8041
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.7
## [1] "res.0.7"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2608
## Number of edges: 130774
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7838
## Number of communities: 7
## Elapsed time: 0 seconds
## [1] 0.8
## [1] "res.0.8"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2608
## Number of edges: 130774
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7663
## Number of communities: 7
## Elapsed time: 0 seconds
clustree(t.cells, prefix = "res.", node_colour = "sc3_stability") + theme(legend.position = "bottom") + guides(edge_alpha = F)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

DimPlot(t.cells, group.by = 'res.0.1', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.2', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.3', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.4', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.5', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.6', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.7', label = T, pt.size = .25)

DimPlot(t.cells, group.by = 'res.0.8', label = T, pt.size = .25)

Set default to 0.4

t.cells <- FindClusters(t.cells, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2608
## Number of edges: 130774
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8471
## Number of communities: 6
## Elapsed time: 0 seconds
t.cells.markers <- FindAllMarkers(t.cells)

Differentiating CD4 from CD8 T cells https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html Seurat cell type assignment

# Naive CD4+
FeaturePlot(object = t.cells,
            features = c("IL7R", "CCR7"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = t.cells,
            features = c("CD4", "CCR7"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = t.cells,
            features = c("CD8A", "CCR7"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# T Cell naive markers, not really used
FeaturePlot(object = fetal,
            features = c("SELL", "LRRN3"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# Memory CD4+
FeaturePlot(object = t.cells,
            features = c("IL7R", "S100A4"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# T regs do not come out clearly, not used
FeaturePlot(object = t.cells,
            features = c("IL2RA", "FOXP3"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# TRM/TEM markers found in some CD8+ T cells, not used
FeaturePlot(object = t.cells,
            features = c("CXCR6"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = t.cells,
            features = c("ITGA1"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# NCAM1 = CD56, NK specific marker, but can also be expressed by T cells; KLRB1 is NK1.1 (CD161)
FeaturePlot(object = t.cells,
            features = c("NCAM1", "KLRB1"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# Expressed in NKs, "NK cell–intrinsic FcεRIγ limits CD8+ T-cell expansion and thereby turns an acute into a chronic viral infection"
FeaturePlot(object = t.cells,
            features = c("FCER1G"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

# NK subtypes GZMB vs. GZMK
FeaturePlot(object = t.cells,
            features = c("GZMB", "GZMK", "CD3D", "KLRB1", "NKG7", "FGFBP2"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = t.cells,
            features = c("CD8A", "KLRB1"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = t.cells,
            features = c("CD3E", "KLRB1"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = t.cells,
            features = c("CD3D", "NCAM1"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

FeaturePlot(object = t.cells,
            features = c("CD3D", "KLRB1"),
            cols = c("lightgrey", "blue"),
            pt.size = .1)

“Single-cell transcriptomic landscape of nucleated cells in umbilical cord blood” KLRB1+ in the T cell CD8+ Cytotoxic is different from the publication. Otherwise, it seems that cluster 5 is NKT due to CD3E and NK markers and clusters 4 vs. 6 distinguish NK subtypes GZMB+ vs GZMK+.

VlnPlot(t.cells, features = "CD3E")

VlnPlot(t.cells, features = "CD4")

VlnPlot(t.cells, features = "CD8A")

VlnPlot(t.cells, features = "KLRB1")

VlnPlot(t.cells, features = c("GZMB", "GZMK"))

VlnPlot(t.cells, features = "CD34")

t.cells <- RenameIdents(t.cells,
                      "0" = "CD8+ Cytotoxic T Cells",
                      "1" = "Naive CD8+ T Cells",
                      "2" = "Natural Killer Cells",
                      "3" = "Naive CD4+ T Cells")
DimPlot(t.cells, group.by = 'ident', label = T, repel = T, pt.size = .25) + NoLegend()

# saveRDS(t.cells, file = paste0(data_dir, "t_cells_subcluster", Sys.Date(), ".rda"))
doublet.dens.plot <- FeaturePlot(maternal, features = "dbl.dens")
#b.cell.doublets <- CellSelector(doublet.dens.plot)
cluster.doublet <- WhichCells(maternal, idents = "8")
maternal.cells.to.drop <- c(cluster.doublet, doublets.tcells, b.cell.doublets)

Overwrite coarse labels with T cell subcluster labels

maternal <- SetIdent(maternal, cells = WhichCells(t.cells), value = Idents(t.cells))
maternal <- subset(maternal, cells = maternal.cells.to.drop, invert = T)

Rename maternal clusters

maternal <- RenameIdents(maternal,
                      "2" = "B Cells",
                      "6" = "Plasma Cells",
                      "4" = "FCGR3A+ Monocytes", 
                      "1" = "CD14+ Monocytes")
# Stash identities for later use if needed
maternal <- StashIdent(maternal, "maternal.res.0.2.cell.type.labelled")
## With Seurat 3.X, stashing identity classes can be accomplished with the following:
## maternal[["maternal.res.0.2.cell.type.labelled"]] <- Idents(object = maternal)
doublet.density.maternal <- FeaturePlot(maternal, features = "dbl.dens")
#tcell.leftovers <- CellSelector(doublet.density.maternal)
#mono.tcell.doublets <- CellSelector(doublet.density.maternal)
#bcell.tcell.doubets <- CellSelector(doublet.density.maternal)
additional.doublets.to.drop <- c(tcell.leftovers, mono.tcell.doublets, bcell.tcell.doubets)
maternal <- subset(maternal, cells = additional.doublets.to.drop, invert = T)

Overwrite coarse cell type labels with subclustering labels.

maternal <- StashIdent(maternal, "maternal.res.0.4.subclustered.labelled")
## With Seurat 3.X, stashing identity classes can be accomplished with the following:
## maternal[["maternal.res.0.4.subclustered.labelled"]] <- Idents(object = maternal)
#saveRDS(maternal, paste0(data_dir,"cleaned_maternal_seurat", Sys.Date(), ".rda"))
# Read cleaned data in
maternal <- readRDS(paste0(data_dir, "cleaned_maternal_seurat2020-12-22.rda"))
all.dropped.cells <- c(maternal.cells.to.drop, additional.doublets.to.drop)
#saveRDS(all.dropped.cells, paste0(data_dir, "maternal_dropped_cells_", Sys.Date(), ".rda"))

Rename cell type clusters with Maternal label

idents <- Idents(maternal)
cell.type <- paste0("Maternal ", idents)
Idents(maternal) <- cell.type
maternal <- StashIdent(maternal, "cell.type")
## With Seurat 3.X, stashing identity classes can be accomplished with the following:
## maternal[["cell.type"]] <- Idents(object = maternal)
#saveRDS(named.maternal, paste0(data_dir, "named_cleaned_maternal_seurat_2020-12-22.rda"))
named.maternal <- readRDS(paste0(data_dir, "named_cleaned_maternal_seurat_2020-12-22.rda"))
DimPlot(named.maternal, label = T, repel = T, pt.size = .25, label.size = 4) + NoLegend()

DimPlot(maternal, label = T, repel = T, pt.size = .25, label.size = 4) + NoLegend()

#ggsave(filename = paste0(data_dir, "maternal_cleaned_", Sys.Date(), ".png"), device = "png")

Combine fetal and maternal datasets

# Read in and merge fetal and maternal datasets
fetal <- readRDS(paste0(data_dir,"named_cleaned_fetal_seurat_2020-12-22.rda"))
maternal <- readRDS(paste0(data_dir,"named_cleaned_maternal_seurat_2020-12-22.rda"))
seu <- merge(fetal, maternal)
rm(fetal)    # Memory clean-up
rm(maternal) # Memory clean-up
umap.fast.mnn <- function(seu, batch) {
  seu <- RunFastMNN(object.list = SplitObject(seu, split.by = batch), verbose = T)
  #print(seu@tools$RunFastMNN@metadata$merge.info$lost.var)
  seu <- RunUMAP(seu, reduction = "mnn", dims = 1:30)
  return(seu)
}
seu <- umap.fast.mnn(seu, "biorep")
## Computing 2000 integration features
## No variable features found for object1 in the object.list. Running FindVariableFeatures ...
## No variable features found for object2 in the object.list. Running FindVariableFeatures ...
## No variable features found for object3 in the object.list. Running FindVariableFeatures ...
## No variable features found for object4 in the object.list. Running FindVariableFeatures ...
## No variable features found for object5 in the object.list. Running FindVariableFeatures ...
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
## 23:09:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 23:09:27 Read 15532 rows and found 30 numeric columns
## 23:09:27 Using Annoy for neighbor search, n_neighbors = 30
## 23:09:27 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 23:09:28 Writing NN index file to temp file C:\Users\Kyle\AppData\Local\Temp\Rtmpms6QbV\filed930720e39b0
## 23:09:28 Searching Annoy index using 1 thread, search_k = 3000
## 23:09:31 Annoy recall = 100%
## 23:09:31 Commencing smooth kNN distance calibration using 1 thread
## 23:09:33 Found 2 connected components, falling back to 'spca' initialization with init_sdev = 1
## 23:09:33 Initializing from PCA
## 23:09:33 PCA: 2 components explained 45.58% variance
## 23:09:33 Commencing optimization for 200 epochs, with 697214 positive edges
## 23:09:48 Optimization finished
DimPlot(seu, label = T, repel = T, pt.size = .25, label.size = 2.5) + NoLegend()

#ggsave(paste0(results_dir, "Maternal_Fetal_Combined_UMAP_", Sys.Date(), ".png"))
#saveRDS(seu, paste0(data_dir,"cleaned_combined_seurat_2020-12-22.rda"))
seu <- readRDS(paste0(data_dir,"cleaned_combined_seurat_2020-12-22.rda"))

QC metrics by cluster

VlnPlot(seu, features = "subsets_Mito_percent", pt.size = .1) + NoLegend() + 
  theme(axis.text.x = element_text(size = 8)
        ) +
  ggtitle("Percentage of Reads Mapping to Mitochondrial Genes by Cluster")

ggsave(paste0(results_dir, "qc_cluster_mito_", Sys.Date(), ".png"))
## Saving 7 x 5 in image
VlnPlot(seu, features = "nCount_RNA", pt.size = .1) + NoLegend() + 
  theme(axis.text.x = element_text(size = 8)
        ) +
  ggtitle("Total RNA Molecules Per Cell by Cluster")

ggsave(paste0(results_dir, "qc_cluster_total_RNA_", Sys.Date(), ".png"))
## Saving 7 x 5 in image
VlnPlot(seu, features = "nFeature_RNA", pt.size = .1) + NoLegend() + 
  theme(axis.text.x = element_text(size = 8)
        ) +
  ggtitle("Detected Genes Per Cell by Cluster")

#ggsave(paste0(results_dir, "qc_cluster_total_genes_", Sys.Date(), ".png"))
list <- SplitObject(seu, split.by = "fetal")
DimPlot(list$Fetal, label = T, repel = T, pt.size = .25, label.size = 2.5) + NoLegend()

#ggsave(paste0(results_dir, "Fetal_Combined_UMAP_", Sys.Date(), ".png"))
DimPlot(list$Maternal, label = T, repel = T, pt.size = .25, label.size = 2.5) + NoLegend()

#ggsave(paste0(results_dir, "Maternal_Combined_UMAP_", Sys.Date(), ".png"))